import os
import re
import shutil
import urllib2
import urlparse
import datetime

import source

class malwarepatrol(source.Base):
  
  def __init__(self):
    self.retrieved = False
    
    self.url = 'http://malwarepatrol.com/cgi/submit?action=list_adblock'
    self.dir = os.path.join('log', 'blacklists', str(datetime.date.today()))
    self.filename = os.path.join(self.dir, 'malwarepatrol.com')
    
    self.patterns = {"ip": "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"}
  
  @property 
  def name(self):
    return 'malwarepatrol.com'
  
  def retrieve(self):
    if not os.path.exists(self.dir):
      os.makedirs(self.dir)

    req = urllib2.urlopen(self.url)
    with open(self.filename, 'wb') as f:
      shutil.copyfileobj(req, f)

    self.retrieved = True
    return  
  
  @property
  def domains(self):
    if not self.retrieved:
      self.retrieve()
    
    assert self.retrieved
    
    with open(self.filename) as f:
      for line in f:
        u = urlparse.urlparse(line)
      
        # skip raw IPs and unparseable hostnames
        if u.hostname and not re.match(self.patterns['ip'], u.hostname):      
          yield {"domain": u.hostname.strip()}
    
